CHAPTER 10: Forecasting with Exponential Smoothing Models

I: Forecasting with moving average models

1: The simple moving average

library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.3.3
data("Coffee_Prices")

ts_info(Coffee_Prices)
##  The Coffee_Prices series is a mts object with 2 variables and 701 observations
##  Frequency: 12 
##  Start time: 1960 1 
##  End time: 2018 5
robusta <- Coffee_Prices[,1]

library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ts_plot(robusta,
        title = "The Robusta Coffee Monthly Prices",
        Ytitle = "Price in USD",
        Xtitle = "Year")

2

library(tidyr)

sma_forecast <- function(df, h, m, w = NULL){
  
  # Error handling
  if(h > nrow(df)){
    stop("The length of the forecast horizon must be shorter than the length of the series")}
  
  if(m > nrow(df)){
    stop("The length of the rolling window must be shorter than the length of the series")}
  if(!is.null(w)){
    if(length(w) != m){
      stop("The weight argument is not aligned with the length of the rolling window")
    } else if(sum(w) !=1){
      stop("The sum of the average weight is different than 1")
    }
  }
  
  # Setting the average weigths
  if(is.null(w)){
    w <- rep(1/m, m)
  }
  
  # Setting the data frame 
  #-----------------------
  # Changing the Date object column name
  names(df)[1] <- "date" 
  # Setting the training and testing partition 
  # according to the forecast horizon
  df$type <- c(rep("train", nrow(df) - h), 
               rep("test", h)) 
  
  # Spreading the table by the partition type
  df1 <- df %>% spread(key = type, value = y)
  
  # Create the target variable
  df1$yhat <- df1$train
  
  
  # Simple moving average function
  for(i in (nrow(df1) - h + 1):nrow(df1)){
    r <- (i-m):(i-1) 
    df1$yhat[i] <- sum(df1$yhat[r] * w) 
  } 
  
  # dropping from the yhat variable the actual values
  # that were used for the rolling window
  df1$yhat <- ifelse(is.na(df1$test), NA, df1$yhat)
  
  df1$y <- ifelse(is.na(df1$test), df1$train, df1$test)
  
  return(df1)
}
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
robusta_df <- ts_to_prophet(robusta)

robusta_fc_m1 <-  sma_forecast(robusta_df, h = 24, m = 1)
robusta_fc_m6 <-  sma_forecast(robusta_df, h = 24, m = 6)
robusta_fc_m12 <- sma_forecast(robusta_df, h = 24, m = 12)
robusta_fc_m24 <- sma_forecast(robusta_df, h = 24, m = 24)
robusta_fc_m36 <- sma_forecast(robusta_df, h = 24, m = 36)
library(plotly)

plot_ly(data = robusta_df[650:nrow(robusta_df),], x = ~ ds, y = ~ y,
        type = "scatter", mode = "lines", 
        name = "Actual") %>%
  add_lines(x = robusta_fc_m1$date, y = robusta_fc_m1$yhat, 
            name = "SMA - 1", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m6$date, y = robusta_fc_m6$yhat, 
            name = "SMA - 6", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m12$date, y = robusta_fc_m12$yhat, 
            name = "SMA - 12", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m24$date, y = robusta_fc_m24$yhat, 
            name = "SMA - 24", line = list(dash = "dash")) %>%
  add_lines(x = robusta_fc_m36$date, y = robusta_fc_m36$yhat, 
            name = "SMA - 36", line = list(dash = "dash")) %>%
  layout(title = "Forecasting the Robusta Coffee Monthly Prices",
         xaxis = list(title = ""),
         yaxis = list(title = "USD per Kg."))

3: Weighted moving average

data(USgas)

USgas_df <- ts_to_prophet(USgas)
USgas_fc_m12a <- sma_forecast(USgas_df,
                              h = 24,
                              m = 12,
                              w = c(1, rep(0,11)))
USgas_fc_m12b <- sma_forecast(USgas_df,
                              h = 24,
                              m = 12,
                              w = c(0.8, rep(0,10), 0.2))
plot_ly(data = USgas_df[190:nrow(USgas_df),], x = ~ ds, y = ~ y,
        type = "scatter", mode = "lines", 
        name = "Actual") %>%
  add_lines(x = USgas_fc_m12a$date, y = USgas_fc_m12a$yhat, 
            name = "WMA - Seasonal Lag", line = list(dash = "dash")) %>%
  add_lines(x = USgas_fc_m12b$date, y = USgas_fc_m12b$yhat, 
            name = "WMA - 12 (0.2/0.8)", line = list(dash = "dash")) %>%
  layout(title = "Forecasting the Monthly Consumption of Natural Gas in the US",
         xaxis = list(title = ""),
         yaxis = list(title = "Billion Cubic Feet"))

II: Forecasting with exponential smoothing

1: Simple exponentian smoothing model

alpha_df <- data.frame(index = seq(from = 1, to = 15, by = 1),
                       power = seq(from = 14, to = 0, by = -1))

alpha_df$alpha_0.01 <- 0.01 * (1 - 0.01) ^ alpha_df$power
alpha_df$alpha_0.2 <- 0.2 * (1 - 0.2) ^ alpha_df$power
alpha_df$alpha_0.4 <- 0.4 * (1 - 0.4) ^ alpha_df$power
alpha_df$alpha_0.6 <- 0.6 * (1 - 0.6) ^ alpha_df$power
alpha_df$alpha_0.8 <- 0.8 * (1 - 0.8) ^ alpha_df$power
alpha_df$alpha_1 <- 1 * (1 - 1) ^ alpha_df$power

plot_ly(data = alpha_df) %>%
  add_lines(x = ~ index, y = ~ alpha_0.01, name = "alpha = 0.01") %>%
  add_lines(x = ~ index, y = ~ alpha_0.2, name = "alpha = 0.2") %>%
  add_lines(x = ~ index, y = ~ alpha_0.4, name = "alpha = 0.4") %>%
  add_lines(x = ~ index, y = ~ alpha_0.6, name = "alpha = 0.6") %>%
  add_lines(x = ~ index, y = ~ alpha_0.8, name = "alpha = 0.8") %>%
  add_lines(x = ~ index, y = ~ alpha_1, name = "alpha = 1") %>%
  layout(title = "Decay Rate of the SES Weights",
         xaxis = list(title = "Index"),
         yaxis = list(title = "Weight"))

2: Forecasting with the ses function

robusta_par <- ts_split(robusta, sample.out = 12)
train <- robusta_par$train
test <- robusta_par$test

library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
fc_ses <- ses(train, h = 12, initial = "optimal")

fc_ses$model
## Simple exponential smoothing 
## 
## Call:
##  ses(y = train, h = 12, initial = "optimal") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 0.6957 
## 
##   sigma:  0.161
## 
##      AIC     AICc      BIC 
## 1989.646 1989.681 2003.252
test_forecast(actual = robusta,
              forecast.obj = fc_ses,
              test = test) %>%
  layout(title = "Robusta Coffee Prices Forecast vs. Actual",
         xaxis = list(range = c(2010, max(time(robusta)))),
         yaxis = list(range = c(1, 3)))

3

plot_forecast(fc_ses) %>%
  add_lines(x = time(test) + deltat(test),
            y = as.numeric(test),
            name = "Testing Partition") %>%
  layout(title = "Robusta Coffee Prices Forecast vs. Actual",
         xaxis = list(range = c(2010, max(time(robusta)) +
                                  deltat(robusta))),
         yaxis = list(range = c(0, 4)))

5: Forecasting with the holt function

library(Quandl)
## Warning: package 'Quandl' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
gdp <- Quandl("FRED/GDP", start_date = "2010-01-01", type = "ts")

ts_info(gdp)
##  The gdp series is a ts object with 1 variable and 48 observations
##  Frequency: 4 
##  Start time: 2010 1 
##  End time: 2021 4
ts_plot(gdp, 
        title = "Gross Domestic Product",
        Ytitle = "Billions of Dollars",
        Xtitle = "Source: U.S. Bureau of Economic Analysis / fred.stlouisfed.org")

6

gdp_par <- ts_split(gdp, sample.out = 8)

train <- gdp_par$train
test <- gdp_par$test

fc_holt <- holt(train,  h = 8, initial = "optimal") 

fc_holt$model
## Holt's method 
## 
## Call:
##  holt(y = train, h = 8, initial = "optimal") 
## 
##   Smoothing parameters:
##     alpha = 0.9985 
##     beta  = 0.0941 
## 
##   Initial states:
##     l = 14603.0487 
##     b = 161.9411 
## 
##   sigma:  80.9869
## 
##      AIC     AICc      BIC 
## 504.8838 506.6485 513.3282
accuracy(fc_holt, test)
##                      ME       RMSE      MAE         MPE      MAPE       MASE
## Training set   10.99164   76.83093  61.7626  0.05241543 0.3460477 0.08790283
## Test set     -665.87355 1141.19097 854.8069 -3.29699460 4.0874667 1.21659297
##                   ACF1 Theil's U
## Training set 0.0192117        NA
## Test set     0.2975704   1.08055
test_forecast(gdp, forecast.obj = fc_holt, test = test)

7

fc_holt_exp <- holt(train,
                    h = 8,
                    beta = 0.75 ,
                    initial = "optimal",
                    exponential = TRUE) 

fc_holt_exp$model
## Holt's method with exponential trend 
## 
## Call:
##  holt(y = train, h = 8, initial = "optimal", exponential = TRUE,  
## 
##  Call:
##      beta = 0.75) 
## 
##   Smoothing parameters:
##     alpha = 0.781 
##     beta  = 0.75 
## 
##   Initial states:
##     l = 14572.2291 
##     b = 1.0142 
## 
##   sigma:  0.0053
## 
##      AIC     AICc      BIC 
## 515.9291 517.0720 522.6847
accuracy(fc_holt_exp, test)
##                       ME       RMSE       MAE         MPE      MAPE      MASE
## Training set   -2.792636   90.20753  74.14323 -0.01756593 0.4165413 0.1055234
## Test set     -703.014806 1151.21262 858.97402 -3.46212317 4.1130726 1.2225238
##                    ACF1 Theil's U
## Training set -0.1541725        NA
## Test set      0.2834645  1.090637
test_forecast(gdp, forecast.obj = fc_holt, test = test)

III: Holt-Winters model

1

data(USgas)

decompose(USgas) %>% plot()

2

USgas_par <- ts_split(USgas, 12)

train <- USgas_par$train
test <- USgas_par$test
md_hw <- HoltWinters(train)

md_hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = train)
## 
## Smoothing parameters:
##  alpha: 0.371213
##  beta : 0
##  gamma: 0.4422456
## 
## Coefficients:
##             [,1]
## a   2491.9930104
## b     -0.1287005
## s1    32.0972651
## s2   597.1088003
## s3   834.9628439
## s4   353.8593860
## s5   318.1927058
## s6  -173.0721496
## s7  -346.6229990
## s8  -329.7169608
## s9  -112.1664217
## s10 -140.3186476
## s11 -324.5343787
## s12 -243.9334551
fc_hw <- forecast(md_hw, h = 12)

accuracy(fc_hw, test)
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  7.828361 115.2062 87.67420 0.2991952 4.248131 0.7614222
## Test set     51.013877 115.1555 98.06531 1.7994297 3.766099 0.8516656
##                     ACF1 Theil's U
## Training set  0.21911103        NA
## Test set     -0.01991923 0.3652142
test_forecast(actual = USgas, 
              forecast.obj = fc_hw, 
              test = test)

3

# shallow_grid <-  ts_grid(train,
#                          model = "HoltWinters",
#                          periods = 6,
#                          window_space = 6,
#                          window_test = 12,
#                          hyper_params = list(alpha = seq(0,1,0.1),
#                                              beta = seq(0,1,0.1),
#                                              gamma = seq(0,1,0.1)),
#                          parallel = FALSE,
#                          n.cores = 8)
# shallow_grid$grid_df[1:10,]
# 
# plot_grid(shallow_grid)

4

# deep_grid <-  ts_grid(train, 
#                       model = "HoltWinters",
#                       periods = 6,
#                       window_space = 6,
#                       window_test = 12,
#                       hyper_params = list(alpha = seq(0.1,0.5,0.01),
#                                           beta = seq(0,0.1,0.01),
#                                           gamma = seq(0.2,0.4,0.01)),
#                       parallel = FALSE,
#                       n.cores = 8)
# plot_grid(deep_grid)

5

#plot_grid(deep_grid, type = "3D", top = 250)

6

# md_hw_grid <- HoltWinters(train, #
#                           alpha = deep_grid$alpha,
#                           beta = deep_grid$beta,
#                           gamma = deep_grid$gamma)
# 
# fc_hw_grid <- forecast(md_hw_grid, h = 12)
# 
# accuracy(fc_hw_grid, test)
# 
# test_forecast(actual = USgas, 
#               forecast.obj = fc_hw_grid, 
#               test = test)